*****************************************************************
*********** Mining and violent competition in Africa ************
****** A.S. Rigterink, T. Ghani, J.S. Lozano, J.N. Shapiro ******
******* Replication of Tables 3, 4 and 5 of the main text *******

/*
0. Setting globals
1. Table 3
2. Table 4
3. Table 5
4. T-test (in text)
5. Back of the envelope calculation (in text)

*/
*****************************************************************

clear all
version 17.0
set more off
cap log close

/*--------------------------------
------- 0. Setting globals -------
----------------------------------*/

*** Directory globals
*** NB: SET YOUR DIRECTORY HERE AND UNTEXT IF NOT RUNNING 0-Master.do
*global dir = "C:\Temp\Dropbox\ESOC - Crisis Group\Extractives\Research\Analysis\Gridded Prediction - Berman\Replication_data\Stata"
*cd "$dir"

*** Subdirectories
global Do_files     "./Do"
global Data			"./Data"
global Results		"./Results"
global Tables 		"./Tables"

*** Scripts for spatially clustered standard errors
do "$Do_files/my_ols_spatial_HAC.do"
do "$Do_files/my_reg2hdfespatial.ado"

*** Load data
use "$Data/main.dta"

/*------------------------------
------- 1. Table 3 -------
--------------------------------*/

*** Quick replication of coefficients
** NB: will not replicate correct standard errors, but will be (much) faster
/*
reghdfe acled_indicator snl_price, absorb(it cell)
reghdfe acled_indicator snl_price if asm_voted!=., absorb(it cell)
reghdfe acled_indicator snl_price asm_price snl_asm_price, absorb(it cell)
reghdfe acled_indicator snl_price asm_price snl_asm_asmprice, absorb(it cell)
reghdfe acled_indicator snl_price onlyasm_price snl_diffasm_asmprice snl_diffasm_lsmprice snl_sameasm_price, absorb(it cell)
*/

* Berman-style regression
my_reg2hdfespatial acled_indicator snl_price, timevar(it) panelvar(cell) lat(y) lon(x) distcutoff(500) lagcutoff(100000) 
estimates save "$Results/Table_main_SNL", replace

* Reduced sample
my_reg2hdfespatial acled_indicator snl_price if asm_voted!=., timevar(it) panelvar(cell) lat(y) lon(x) distcutoff(500) lagcutoff(100000) 
estimates save "$Results/Table_main_geosample", replace

* Full model: LSM*ASM cells get LSM price
my_reg2hdfespatial acled_indicator snl_price asm_price snl_asm_price, timevar(it) panelvar(cell) lat(y) lon(x) distcutoff(500) lagcutoff(100000)
estimates save "$Results/Table_main", replace

* Full model: LSM*ASM cells get ASM price
my_reg2hdfespatial acled_indicator snl_price asm_price snl_asm_asmprice, timevar(it) panelvar(cell) lat(y) lon(x) distcutoff(500) lagcutoff(100000)
estimates save "$Results/Table_main_ASMprice", replace

* Same versus different ASM commodity
my_reg2hdfespatial acled_indicator snl_price onlyasm_price snl_diffasm_asmprice snl_diffasm_lsmprice snl_sameasm_price, timevar(it) panelvar(cell) lat(y) lon(x) distcutoff(500) lagcutoff(100000) 
estimates save "$Results/Table_main_sameASM", replace


/*------------------------------
------- 2. Table 4 -------
--------------------------------*/

*** Quick replication of coefficients
** NB: will not replicate correct standard errors, but will be (much) faster
/*
foreach X in acled_noreb acled_reb {
	reghdfe `X' snl_price asm_price snl_asm_price, absorb(it cell)
	reghdfe `X' snl_price asm_price snl_asm_asmprice, absorb(it cell)
}
*/


foreach X in acled_noreb acled_reb {
	my_reg2hdfespatial `X' snl_price asm_price snl_asm_price, timevar(it) panelvar(cell) lat(y) lon(x) distcutoff(500) lagcutoff(100000) 
	estimates save "$Results/Table_het_`X'", replace
	
	my_reg2hdfespatial `X' snl_price asm_price snl_asm_asmprice, timevar(it) panelvar(cell) lat(y) lon(x) distcutoff(500) lagcutoff(100000) 
	estimates save "$Results/Table_het_`X'_a", replace
}


/*------------------------------
------- 3. Table 5 -------
--------------------------------*/

global snl_owner_types_known "owner_isin"
global snl_owner_types_c_kn "owner_tax owner_local owner_chn owner_oecd9"


*** Quick replication of coefficients
** NB: will not replicate correct standard errors, but will be (much) faster
/*
foreach X in $snl_owner_types_known {
	reghdfe acled_indicator snl_price owner_known_price `X'_price asm_price snl_asm_price owner_known_asm_price `X'_asm_price, absorb(it cell)
}
foreach X in $snl_owner_types_c_kn {
    reghdfe acled_indicator snl_price owner_c_kn_price `X'_price asm_price snl_asm_price owner_c_kn_asm_price `X'_asm_price, absorb(it cell)
}
*/

** Variables non-missing for non-missing owner name
foreach X in $snl_owner_types_known {
    my_reg2hdfespatial acled_indicator snl_price owner_known_price `X'_price asm_price snl_asm_price owner_known_asm_price `X'_asm_price, timevar(it) panelvar(cell) lat(y) lon(x) distcutoff(500) lagcutoff(100000) 
	estimates save "$Results/Table_het_`X'", replace
}

** Variables non-missing for non-missing owner country of registration
* for owner
foreach X in $snl_owner_types_c_kn {
    my_reg2hdfespatial acled_indicator snl_price owner_c_kn_price `X'_price asm_price snl_asm_price owner_c_kn_asm_price `X'_asm_price, timevar(it) panelvar(cell) lat(y) lon(x) distcutoff(500) lagcutoff(100000) 
	estimates save "$Results/Table_het_`X'", replace
}

*/

/*---------------------------------
------- 4. T-test (in text) -------
-----------------------------------*/

* T-test 
ttest acled_indicator if snl_price!=0, by(asm_voted)

/*-----------------------------------------------------------
------- 5. Back of the envelope calculation (in text) -------
-------------------------------------------------------------*/

preserve

*** How much do we predict? 
reghdfe acled_indicator snl_price asm_price snl_asm_price, absorb(it cell)
predict us_predict, xb

*** How much do fixed effects only predict?
reghdfe acled_indicator if asm_voted!=., absorb(it cell)
predict fe_predict 

*** To calculate net of FEs: untext the next line
*replace us_predict = us_predict-fe_predict

*** Different categories of cell, based on presence LSM and suitability ASM
drop if us_predict==.
gen cat1 = 0 if asm_voted==0 & snl_dummy==0
replace cat1 = 1 if asm_voted==0 & snl_dummy==1
replace cat1 = 2 if asm_voted==1 & snl_dummy==0
replace cat1 = 3 if asm_voted==1 & snl_dummy==1

*** Setting cut-off to match real number of ACLED conflict cells
sum acled_indicator 
local total = r(sum)
egen rank = rank(us_predict), field
gen us_predict_dum = 1 if rank<`total'
replace us_predict_dum = 0 if us_predict_dum==.

*** Amount of conflict predicted for each category of cell
gen count = 1 

collapse (rawsum) count us_predict us_predict_dum fe_predict acled_indicator (mean) us_predict_mean=us_predict acled_mean=acled_indicator fe_predict_mean=fe_predict, by(cat1)

sum us_predict_dum if cat1==1 
local us_lsm = r(mean)

sum us_predict_dum if cat1==3 
local us_lsm_asm = r(mean)

sum count if cat1==1 
local count_lsm = r(mean)

sum count if cat1==3 
local count_lsm_asm = r(mean)

*** Replicating numbers mentioned in-text
* NB: `upper' is the upper bound mentioned, `lower' is the lower bound
list
local total_viol = `us_lsm_asm'+`us_lsm'
di `total'
local total_count = `count_lsm' + `count_lsm_asm'
di `total_count'
local upper = `us_lsm_asm'/(`us_lsm_asm'+`us_lsm')
di `upper'
local subtract = (`us_lsm'/`count_lsm')*`count_lsm_asm'
di `subtract'
local leaves = (`us_lsm_asm'-((`us_lsm'/`count_lsm')*`count_lsm_asm'))
di `leaves'
local lower = (`us_lsm_asm'-((`us_lsm'/`count_lsm')*`count_lsm_asm'))/(`us_lsm_asm'+`us_lsm')
di `lower'

restore

